Antibiotics gene graph

Load generic libraries

source('configuration.r')

Load plot specific libraries

suppressMessages(library(igraph))
library(ggraph)
library(foreach)

Plasmids

Cluster mash distance matrix for plasmids

p.dist <- read.table('../tables/plasmid_mash_dist.dat')
clustering <- hclust(as.dist(p.dist), method='single')
clusters <- data.frame(clusters=cutree(clustering, h=0.02)) %>% rownames_to_column('plasmid_ID')

mapping.p <-
  read.table('../tables/plasmid_info.dat', stringsAsFactors = FALSE, sep='\t', head=TRUE) 
  
plasmid.dat <- merge(clusters, mapping.p, by='plasmid_ID')  ## this merge removes duplicated libraries in the matrix

## write merged file for verification of the cluster using annotation  
write.table(plasmid.dat, '../output_tables/plasmid_info.hclust0.05.dat', sep='\t', quote=F, row.names=F, col.names = T)

Graph

links.p <- read.table('../tables/antibiotics_gene_linkage.plasmid.tsv', stringsAsFactors = FALSE, head=T) 

pcluster.ar.links <- merge(links.p, plasmid.dat, by="plasmid_ID") %>% 
  select(plasmid_ID, clusters, ar=AR_gene) %>% 
  count(clusters, ar) %>% 
  mutate(V6=str_replace(ar, 'PheCmlA5', 'Phe')) %>% ## ad hoc correction 
  mutate(V6=str_replace(ar, 'Far1_Fcd', 'Far1_Bla'))

pcluster.ar.links <- count(plasmid.dat, clusters, name="cluster.size")  %>% 
  merge(pcluster.ar.links, by="clusters") %>% 
  mutate(score=n/cluster.size) %>% 
  select(clusters, ar, score, cluster.size) %>% 
  filter(cluster.size>1) %>%   ## remove edges connecting to cluster with size 1
  mutate(clusters=str_c('#', clusters))

g <- graph_from_data_frame(pcluster.ar.links, directed=FALSE)
V(g)$type <- str_detect(V(g)$name, '^#')
V(g)$name <- V(g)$name
V(g)$size <- 10
V(g)$size[V(g)$type] <- (select(pcluster.ar.links, clusters, cluster.size) %>% unique())[,2]
V(g)$name[V(g)$type] <- paste0(V(g)$name[V(g)$type], "(",V(g)$size[V(g)$type], ")")
E(g)$weight <- pcluster.ar.links$score

ggraph(g, layout='fr') + 
  geom_edge_arc(aes(width=weight,col=weight>=1), alpha=0.4,
                curvature = 0.05,
                end_cap=circle(4, 'mm'), start_cap=circle(4, 'mm')) +
  geom_node_point(aes(shape=type, size=size, color=type)) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') + 
  scale_edge_color_manual(values=c('black','red')) + 
  scale_edge_width_continuous(range=c(0.1, 2)) + 
  scale_radius(range=c(8,20)) +
  scale_color_manual(values=pal_npg("nrc")(10)[2:3]) + 
  theme_void() + 
  scale_shape_manual(values=c(18,19))

ggsave("../plots/fig3_ar_gene_graph_plasmid.pdf", height = 15, width = 25)

Genome

Genome data

genome.dat <- read.table("../tables/genome_info.dat", head=TRUE, sep='\t') %>% select(clusters=Species_name, Nanopore_ID)

Graph

links.s <- read.table('../tables/antibiotics_gene_linkage.species.tsv', stringsAsFactors = FALSE, head=T) 

## focus only on high/medium quality genomes
links.s <- merge(links.s, genome.dat, by=c(1,2)) 

pcluster.ar.links <- count(links.s, species, sample, AR_gene) %>% ## de-duplicate multiple copies
  select(clusters=species, ar=AR_gene) %>% 
  count(clusters, ar) %>%
  mutate(ar=str_replace(ar, 'PheCmlA5', 'Phe')) %>% ## ad hoc correction 
  mutate(ar=str_replace(ar, 'Far1_Fcd', 'Far1_Bla'))

pcluster.ar.links <- count(genome.dat, clusters, name="cluster.size")  %>% 
  merge(pcluster.ar.links, by="clusters") %>% 
  mutate(score=n/cluster.size) %>% 
  select(clusters, ar, score, cluster.size) %>% 
  filter(cluster.size>1) %>%   ## remove edges connecting to cluster with size 1
  mutate(clusters=str_replace(clusters, '_', ' '))

g <- graph_from_data_frame(pcluster.ar.links, directed=FALSE)
V(g)$type <- V(g)$name %in% unique(pcluster.ar.links$clusters)
V(g)$name <- V(g)$name
V(g)$size <- 10
V(g)$size[V(g)$type] <- (select(pcluster.ar.links, clusters, cluster.size) %>% unique())[,2]
V(g)$name[V(g)$type] <- paste0(V(g)$name[V(g)$type], "(",V(g)$size[V(g)$type], ")")
E(g)$weight <- pcluster.ar.links$score

ggraph(g, layout='fr') + 
  geom_edge_arc(aes(width=weight,col=weight>=0.8), alpha=0.4,
                curvature = 0.05,
                end_cap=circle(4, 'mm'), start_cap=circle(4, 'mm')) +
  geom_node_point(aes(shape=type, size=size, color=type)) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') + 
  scale_edge_color_manual(values=c('black','red')) + 
  scale_edge_width_continuous(range=c(0.1, 2)) + 
  scale_radius(range=c(8,20)) +
  scale_color_manual(values=pal_npg("nrc")(10)[2:3]) + 
  theme_void() + 
  scale_shape_manual(values=c(18,19))

ggsave("../plots/fig3_ar_gene_graph_genome.pdf", height = 25, width = 25)
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] foreach_1.4.4  ggraph_1.0.2   igraph_1.2.2   ggsci_2.9     
##  [5] reshape2_1.4.3 stringr_1.3.0  tibble_2.0.1   tidyr_0.8.3   
##  [9] dplyr_0.8.0.1  gridExtra_2.3  ggplot2_3.1.0 
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.0        pillar_1.3.1      compiler_3.4.4   
##  [4] plyr_1.8.4        iterators_1.0.9   viridis_0.5.1    
##  [7] tools_3.4.4       digest_0.6.18     viridisLite_0.3.0
## [10] evaluate_0.10.1   gtable_0.2.0      pkgconfig_2.0.2  
## [13] rlang_0.3.1       ggrepel_0.8.0     yaml_2.1.18      
## [16] withr_2.1.2       knitr_1.20        rprojroot_1.3-2  
## [19] tidyselect_0.2.5  glue_1.3.0        R6_2.4.0         
## [22] rmarkdown_1.9     farver_1.0        tweenr_1.0.0     
## [25] purrr_0.3.1       magrittr_1.5      codetools_0.2-15 
## [28] units_0.6-1       backports_1.1.2   scales_1.0.0     
## [31] htmltools_0.3.6   MASS_7.3-49       assertthat_0.2.0 
## [34] ggforce_0.1.3     colorspace_1.3-2  labeling_0.3     
## [37] stringi_1.3.1     lazyeval_0.2.1    munsell_0.5.0    
## [40] crayon_1.3.4